<?php
	header("Content-Type: text/html; charset=utf-8");
    //ob_start();
?>


<html lang="zh" xml:lang="zh" xmlns="http://www.w3.org/1999/xhtml">
	<head>
		<meta http-equiv="content-type" content="text/html;charset=utf-8" />
	</head>
<body>
<form method="post" action="./<?php echo basename($_SERVER['PHP_SELF'])?>">
	<table>
		<tr>
			<td colspan="2">
				http://yande.re/post分类通用采集器
			</td>
		</tr>
		<tr>
			<td width="20%">
				分类地址:
			</td>
			<td>
				<input type="text" name="url" value="http://yande.re/post?page=1<?php #echo $_POST['url'];?>" />
			</td>
		</tr>
		<tr>
			<td colspan="2">
				<input type="submit" value="GO" />
			</td>
		</tr>
	</table>
</form>
</body>
</html>

<?php
    //ob_flush();
	ignore_user_abort(true);
	set_time_limit(0);
	$base_path = "E:\\web\\hf\\crawler\moe\\";

	//某个连接
	$link = $_POST['url'];
	if($link == ''){
		exit;
	}
	
	//$content = file_get_contents($link);
	$base_link = 'http://yande.re/post?page=';
	$base_pagenums = 400;
	
	$content = file_get_contents('http://yande.re/post?page=1');
	//获取总页数
	$match_str = "/<div id=\"paginator\">(.*)<div class=\"footer\"/isU";
	preg_match_all($match_str,$content,$last_page_num);
	
	
	
	$last_page_num = $last_page_num[0][0];
	$match_str = "/>(.*)<\/a> <a href=\"\/post\?page=2\">\&gt/isU";
	preg_match_all($match_str,$last_page_num,$last_page_num);
	$last_page_num = $last_page_num[1][0];
	$last_page_num = explode('>',$last_page_num);
	$last_page_num = $last_page_num[count($last_page_num) - 1];
	
	
	//匹配每页中每个图片的连接
	$links = '';
	$j = 0;
	for ($i = 1 ; $i <= $last_page_num;$i++){
        //if($i < 6000 ) continue;
        
		echo '现在开始检查第' . $i . '页<br />';
		
		ob_flush();
		$content = file_get_contents($base_link . $i);
		$piclinks = get_picture_links($content);
		foreach ($piclinks as $k=>$v) {
		 	$links .= $v . "\r\n";
		 } 
		 if ($i % $base_pagenums == 0) {
		 	$fp = fopen($base_path . 'total' . $j .'.lst','aw');
			fwrite($fp,$links,strlen($links));
			fclose($fp);
		 	$j++;
		 	$links = '';
		 }
	}
	//补写没写的
	if ($links != '') {
		$fp = fopen($base_path . 'total' . $j .'.lst','aw');
		fwrite($fp,$links,strlen($links));
		fclose($fp);
	}
			
		
	function get_picture_links($context){
		$match_str = "/<ul id=\"post\-list\-posts\">(.*)<\/ul>/isU";
		preg_match_all($match_str,$context,$link_block);
		$link_block = $link_block[0][0];
		$match_str = "/<a class=\"directlink largeimg\" href=\"(.*)\"><span>/isU";
		preg_match_all($match_str,$link_block,$link_block);
		$link_block = $link_block[1];
		
		//print_r($link_block);exit;
		return $link_block;
	}
	
	
	echo '<br /><br /><br />DONE<br /><br /><br />';
	//ob_end_flush();
	exit;
?>